# Script that controls for type of occupation in order to see whether the effect
# is driven by joblosers obtaining more candidacy friendly occupations after 
# job loss. 

## Last changed: 2025-05-23

sink("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Output/FigureA17.txt", split=TRUE)


library(PSweight)
library(cobalt)
library(MatchIt)
library(distances)
library(moreparty)
library(party)
library(caret)
library(bonsai)
library(randomForest)
library(tidymodels)
library(tidyverse)
library(rmarkdown)
library(tinytex)
library(tibble)
library(knitr)
library(car)
library(fst)
library(stargazer)
library(pryr)
library(stringr)
library(fst)
library(lubridate)
library(data.table)
library(summarytools)
library(ggplot2)
library(cowplot)
library(ggpubr)
library(haven)
library(broom)
library(fixest)
library(modelsummary)
library(gt)
library(MASS)
library(survey)
library(collapse)
library(readxl)
library(Hmisc)
library(DescTools)
library(ranger)
library(stablelearner)
library(future.apply)
library(lme4)
library(partykit)
library(ggiplot)

  SES <- read_fst("E:/ProjData/SESData/ses-SESData_9021.fst", as.data.table=T)
  SES2 <- read_fst("E:/ProjData/SESData/ses-SESData_231005.fst", as.data.table=T)
  SES2 <- SES2[Ar==1985, ]
  SES2 <- SES2[, .(LopNr, Ar, Yrke_ssyk)]
  setnames(SES2, "Yrke_ssyk", "ssyk_96_12")

  SES <- rbindlist(list(SES, SES2), fill = TRUE, use.names = TRUE)
  SES[, ssyk_96_12 := as.numeric(ssyk_96_12)]
  
  FodUppg <- read.fst("D:/SCB_ConPol/Fst/RTB/Fodelseuppg_2024.fst", as.data.table=T)
  FodUppg <- FodUppg[, cID := .N, by = LopNr][cID == 1 & !is.na(LopNr), ][, cID := NULL]  
  FodUppg <- FodUppg[, .(LopNr, FodArMan, UtlSvBakg, Kon)]
  FodUppg[, FodAr := floor(FodArMan/100)]
  FodUppg[, FodArMan := NULL]

  SES <- FodUppg[, .(LopNr, FodAr)][SES, on = "LopNr"]

  SES[Ar==1990, Ar :=1991]
  SES[Ar>2001 & is.na(ssyk_96_12) & SyssStat!=1, ssyk_96_12 := -99]
  SES[Ar>2001 & is.na(ssyk_96_12) & SyssStat==1, ssyk_96_12 := -98]
  SES[Ar==2021, Ar := 2022]
  SES <- SES[, .(LopNr, Ar, ssyk_96_12)]
  rm(list=setdiff(ls(), c("SES")))
  gc()

# Specify the name of the treatment variable
  tvar <- "T94_90"
  
# Specify the the birth cohorts to be used for the analysis
  lbyr <- 1942
  ubyr <- 1967
  
# Specify if only include Swedish citizens 
  citizen <- 1
  
# Specify the propensity score specification
  psformula <- formula(T94 ~
                         Kon + DAStod_1982 + DAStod_1983 + DAStod_1984 +
                         DAStod_1985 + DAStod_1986 + DAStod_1987 + DAStod_1988 +
                         DAStod_1989 +
                         DAntBarn_91 + Sambo_91 + InvBak  |
                         SSYK3_90 + SNI2_91 + Kommun_91 +
                         rCARB_1982 + rCARB_1983 + rCARB_1984 + rCARB_1985 +
                         rCARB_1986 + rCARB_1987 + rCARB_1988 + rCARB_1989 +
                         pArbInk_1990 + pArbInk_1991 + UtbAr_91 +
                         FodAr + Nom82 + Nom85 + Nom88 + Nom91)

## Estimate two models, one for the complete sample including all observations,
## the second on the restricted sample only including individuals for which
## we can observe all elections bw 82-18 in which they are eligible to run for office. 

  Models <-  vector('list', 10)
  j <- 1

 urval <- 1 
 
 for(i in 1:5) { 
  # Read in the data sets
  NomVald <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_NomVald.fst", as.data.table=T)
  RTB <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_RTB.fst", as.data.table=T)
  db <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_Nom.fst", as.data.table=T)
      
  db[, T94 := get(tvar)]
  mdb <- db[between(FodAr, lbyr, ubyr) & (SvMedb82_18 >= citizen) & (AllaVal >= urval),]
  rm(db)
  gc()
  
  # Split the in SES quartiles
  mdb[!is.na(avSES3), ravSES := cut_number(avSES3, 4, labels = FALSE)]
  if(i==1) mdb <- mdb[between(ravSES, 1, 1)]
  if(i==2) mdb <- mdb[between(ravSES, 2, 2)]
  if(i==3) mdb <- mdb[between(ravSES, 3, 3)]
  if(i==4) mdb <- mdb[between(ravSES, 4, 4)]
  
# Use logit model to estimate propensity scores
  mdb <- mdb[complete.cases(mdb),]
  tm <- proc.time()
  logmod <- feglm(psformula,  
                data = mdb, family="binomial")
  proc.time()-tm
  summary(logmod)

# Extract predicted probability (propensity scores)
  if(logmod$nobs == logmod$nobs_origin){ 
    mdb[, pT94 := predict(logmod, type = "response")]
  }else{
    mdb[logmod$obs_selection$obsRemoved, pT94 := predict(logmod, type = "response")]
  }

  mdb <- mdb[complete.cases(mdb),]

# Now use the Full matching approach of Sävje et al. to obtain ATT and ATE weights.  

  tmp <- proc.time()
  set.seed(7892)
  m.out <- matchit(T94~1, data = mdb,
                 distance = mdb$pT94, method = "quick", estimand = "ATT")
  proc.time()-tmp
  m.data1 <- as.data.table(match.data(m.out))[, .(LopNr, weights)]
  setnames(m.data1, "weights", "gm_att")

  mdb <- m.data1[, .(LopNr, gm_att)][mdb, on = "LopNr"]
  gc()
  RTB <- mdb[RTB, on = "LopNr"]

  rm(mdb)
  gc()

  rm(list=setdiff(ls(), c("NomVald", "RTB", "rtbvar", "Models", "j", "tvar", "lbyr", "ubyr", "citizen", "psformula", "urval", "SES")))
  gc()


  mdb <- RTB[!is.na(gm_att), 
             .(Kon, Nom82, Nom85, Nom88, Nom91, Sysselsatt_85, DAntBarn_91, Sambo_91, 
               DStud_91, DForLed_91, DSjukRe_91, DSocBidrFam_91, DBostBidrFam_91,
               UtbAr_91, FodAr, InvBak, isei, Yrke80_90, SNI2_90, Kommun_91, fp_LoneInk_85, fp_LoneInk_90, fp_LoneInk_91,
               gm_att, Nom, LopNr, T94, Ar, pT94, FodMan)]

  gc()
  
  mdb[, Nom := Nom*100]
  mdb[, cID := .N, by=.(LopNr, Ar)]
  #mdb <- mdb[(Ar-FodAr)<65, ]
  
  rm(RTB)
  gc()

  mdb <- SES[mdb, on = c("LopNr", "Ar")]
  
  mdb <- mdb[Ar %in% c(1985, 1991, 2002, 2006, 2010, 2014, 2018, 2022)]
  Models[[j]] <- feols(Nom~T94+i(Ar, T94, ref=1991) | Ar + ssyk_96_12^Ar, cluster ="LopNr", 
                       weights = mdb[, gm_att], 
                       mdb[])
  
  
  print(summary(Models[[j]]))
  print(uniqueN(mdb[T94==0, LopNr]))
  print(uniqueN(mdb[T94==1, LopNr]))
  print(uniqueN(mdb[, LopNr]))
  print(dim(mdb))
  print(qsu(mdb$cID))
  j <- j+1
}  
 
  valar <- c(1985, 1991, 2002, 2006, 2010, 2014, 2018, 2022)
  options(scipen=999)
  
for(j in c("fs", "rs")) {  
  for(i in 1:5) {
    
    ses <- ""
    if(i %in% c(1, 2, 3, 4)) ses <- paste0("Quartile ", i, " SES") 
    if(i %in% c(6, 7, 8, 9)) ses <- paste0("Quartile ", (i-5), " SES") 
    
    gi <- ggiplot(Models[[i]]) 
    gp <- ggplot(gi$data, aes(x, estimate)) +
      geom_point() + 
      geom_hline(yintercept=0, linetype="longdash") +
      geom_errorbar(aes(ymin=ci_low,ymax=ci_high), width=0.4, size=.6) +
      geom_line(linetype="dotted") +
      scale_x_continuous(breaks = valar,  labels = valar, limits = c(1984, 2023)) +
      scale_y_continuous(limits = c(-.3, .9), breaks = round(seq(-.3, .9, by = .1), digits=1)) +
      xlab("Election Year") +
      ylab("Effect on Candidacy") +
      ggtitle(ses) +
      theme_classic(base_size = 14) +
      theme(legend.title=element_blank()) +
      theme(legend.position = "bottom", panel.grid.major.y = element_line(color = "grey95"))
      
    gp
    #ggsave(paste0("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/Figures/Fig_", j, "_", i, ".pdf"), width = 10, height =7.5, units = "in") 
    
    assign(paste0("gp", i), gp)
  }  
}
  
  ggarrange(gp1, gp2, gp3, gp4)
  ggsave("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/FigureA17.pdf", width = 15, height =10, units = "in") 
  modelsummary(Models[c(1, 2, 3, 4)], output = "C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Tables/TableFigA17.tex")
  sink()